{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Pandas equivalents for common SQL operations\n",
    "- this notebook shows the Pandas equivalents for some common SQL operations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import libraries and ingest data\n",
    "- load one sheet from the streetcar delay dataset into a Pandas dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np # linear algebra\n",
    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
    "import datetime\n",
    "import os\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "raw path is C:\\personal\\manning\\deep_learning_for_structured_data\\notebooks\n"
     ]
    }
   ],
   "source": [
    "# get the directory for that this notebook is in\n",
    "rawpath = os.getcwd()\n",
    "print(\"raw path is\",rawpath)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "path is C:\\personal\\manning\\deep_learning_for_structured_data\\data\n"
     ]
    }
   ],
   "source": [
    "# data is in a directory called \"data\" that is a sibling to the directory containing the notebook\n",
    "# this code assumes you have copied to this directory all the XLS files from the source dataset: https://www.toronto.ca/city-government/data-research-maps/open-data/open-data-catalogue/#e8f359f0-2f47-3058-bf64-6ec488de52da\n",
    "path = os.path.abspath(os.path.join(rawpath, '..', 'data'))\n",
    "print(\"path is\", path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "# dataset into Pandas dataframe\n",
    "file = \"Streetcar Jan 2014.csv\"\n",
    "streetcarjan2014=pd.read_csv(os.path.join(path,file))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Show SQL / Pandas equivalent statements\n",
    "- in the following cells, each Python statement is preceded by a comment that shows the SQL that would produce the same result as the Python statement"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Report Date</th>\n",
       "      <th>Route</th>\n",
       "      <th>Time</th>\n",
       "      <th>Day</th>\n",
       "      <th>Location</th>\n",
       "      <th>Incident</th>\n",
       "      <th>Min Delay</th>\n",
       "      <th>Min Gap</th>\n",
       "      <th>Direction</th>\n",
       "      <th>Vehicle</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>505</td>\n",
       "      <td>6:31:00 AM</td>\n",
       "      <td>Thursday</td>\n",
       "      <td>Dundas and Roncesvalles</td>\n",
       "      <td>Late Leaving Garage</td>\n",
       "      <td>4</td>\n",
       "      <td>8.0</td>\n",
       "      <td>E/B</td>\n",
       "      <td>4018.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>504</td>\n",
       "      <td>12:43:00 PM</td>\n",
       "      <td>Thursday</td>\n",
       "      <td>King and Shaw</td>\n",
       "      <td>Utilized Off Route</td>\n",
       "      <td>20</td>\n",
       "      <td>22.0</td>\n",
       "      <td>E/B</td>\n",
       "      <td>4128.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>501</td>\n",
       "      <td>2:01:00 PM</td>\n",
       "      <td>Thursday</td>\n",
       "      <td>Kingston road and Bingham</td>\n",
       "      <td>Held By</td>\n",
       "      <td>13</td>\n",
       "      <td>19.0</td>\n",
       "      <td>W/B</td>\n",
       "      <td>4016.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Report Date  Route         Time       Day                   Location  \\\n",
       "0  2014-01-02    505   6:31:00 AM  Thursday    Dundas and Roncesvalles   \n",
       "1  2014-01-02    504  12:43:00 PM  Thursday              King and Shaw   \n",
       "2  2014-01-02    501   2:01:00 PM  Thursday  Kingston road and Bingham   \n",
       "\n",
       "              Incident  Min Delay  Min Gap Direction  Vehicle  \n",
       "0  Late Leaving Garage          4      8.0       E/B   4018.0  \n",
       "1   Utilized Off Route         20     22.0       E/B   4128.0  \n",
       "2              Held By         13     19.0       W/B   4016.0  "
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# select * from streetcarjan2014 limit 3\n",
    "streetcarjan2014.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    504\n",
       "Name: Route, dtype: int64"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# single condition on a Select\n",
    "# select \"Route\" from streetcarjan2014 where \"Location \" = 'King and Shaw'\n",
    "streetcarjan2014[streetcarjan2014.Location == \"King and Shaw\"].Route"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Late Leaving Garage', 'Utilized Off Route', 'Held By',\n",
       "       'Investigation', 'Mechanical', 'General Delay',\n",
       "       'Emergency Services', 'Diversion'], dtype=object)"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# unique entries in a column\n",
    "# select distinct \"Incident\" from streetcarjan2014\n",
    "streetcarjan2014.Incident.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Report Date</th>\n",
       "      <th>Route</th>\n",
       "      <th>Time</th>\n",
       "      <th>Day</th>\n",
       "      <th>Location</th>\n",
       "      <th>Incident</th>\n",
       "      <th>Min Delay</th>\n",
       "      <th>Min Gap</th>\n",
       "      <th>Direction</th>\n",
       "      <th>Vehicle</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>305</td>\n",
       "      <td>2014-01-19</td>\n",
       "      <td>504</td>\n",
       "      <td>8:33:00 AM</td>\n",
       "      <td>Sunday</td>\n",
       "      <td>King and Queen</td>\n",
       "      <td>Held By</td>\n",
       "      <td>40</td>\n",
       "      <td>50.0</td>\n",
       "      <td>E/B</td>\n",
       "      <td>4089.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>311</td>\n",
       "      <td>2014-01-19</td>\n",
       "      <td>511</td>\n",
       "      <td>7:17:00 PM</td>\n",
       "      <td>Sunday</td>\n",
       "      <td>Bathurst and Front</td>\n",
       "      <td>Investigation</td>\n",
       "      <td>33</td>\n",
       "      <td>40.0</td>\n",
       "      <td>S/B</td>\n",
       "      <td>4179.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Report Date  Route        Time     Day            Location       Incident  \\\n",
       "305  2014-01-19    504  8:33:00 AM  Sunday      King and Queen        Held By   \n",
       "311  2014-01-19    511  7:17:00 PM  Sunday  Bathurst and Front  Investigation   \n",
       "\n",
       "     Min Delay  Min Gap Direction  Vehicle  \n",
       "305         40     50.0       E/B   4089.0  \n",
       "311         33     40.0       S/B   4179.0  "
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# multiple conditions on a Select\n",
    "# select * from streetcarjan2014 where 'Min Delay' > 20 and Day = \"Sunday\"\n",
    "streetcarjan2014[(streetcarjan2014['Min Delay'] > 20) & (streetcarjan2014['Day'] == \"Sunday\")]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Route</th>\n",
       "      <th>Min Delay</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>148</td>\n",
       "      <td>501</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>271</td>\n",
       "      <td>501</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>277</td>\n",
       "      <td>506</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>296</td>\n",
       "      <td>506</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>295</td>\n",
       "      <td>504</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>298</td>\n",
       "      <td>511</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>297</td>\n",
       "      <td>504</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>293</td>\n",
       "      <td>505</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>300</td>\n",
       "      <td>511</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>311</td>\n",
       "      <td>511</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>304</td>\n",
       "      <td>504</td>\n",
       "      <td>35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>305</td>\n",
       "      <td>504</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>286</td>\n",
       "      <td>512</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>299</td>\n",
       "      <td>511</td>\n",
       "      <td>66</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>291</td>\n",
       "      <td>506</td>\n",
       "      <td>74</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>237</td>\n",
       "      <td>504</td>\n",
       "      <td>77</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>236</td>\n",
       "      <td>501</td>\n",
       "      <td>93</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Route  Min Delay\n",
       "148    501         22\n",
       "271    501         22\n",
       "277    506         23\n",
       "296    506         25\n",
       "295    504         25\n",
       "298    511         27\n",
       "297    504         28\n",
       "293    505         33\n",
       "300    511         33\n",
       "311    511         33\n",
       "304    504         35\n",
       "305    504         40\n",
       "286    512         42\n",
       "299    511         66\n",
       "291    506         74\n",
       "237    504         77\n",
       "236    501         93"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# ORDER BY\n",
    "# select \"Route\", \"Min Delay\" from streetcarjan2014 where \"Min Delay\" > 20 order by \"Min Delay\"\n",
    "streetcarjan2014[['Route','Min Delay']][(streetcarjan2014['Min Delay'] > 20)].sort_values('Min Delay')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
